import pandas as pd
import numpy as np
import statsmodels.api as sm

# Define the file path to the research project's replication package 
filepath = r'C:\Users\mehran.azimi\\'

# output file name
file_name = 'Sum_Stats.xlsx'


begdate = 199002
enddate = 202212


# Load data and filter by date range
data_Inv=pd.read_csv(filepath + 'AllData.csv')
data_Inv=data_Inv[ (data_Inv['yearmo']>=begdate) & (data_Inv['yearmo']<=enddate)  ]
data_Inv=data_Inv.reset_index()

_vars=['sii','gp', 'alpha', 'ogap_dyn', 'skvw', 'dy', 'dp', 'lty', 'avgcor', 'wtexas', 'bm', 'tbl', 'vrp', 'tail', 'ep', 'svar', 'infl', 'tms', 'eg', 'dfy', 'de', 'ndrbl', 'ntis', 'fbm','lzrt','dtoat','dfr','ltr']
date_filter = data_Inv['date'] == '12/31/2022'  
indepvars_2022 = data_Inv[  data_Inv['date']=='12/31/2022' ][_vars].dropna(axis=1).columns.to_list()
indepvars_2021 = list(set(_vars) - set(indepvars_2022) ) + ['alpha']

# To get results for variables that end in 2021, substitute indepvars_2022 with indepvars_2021 below
data_Inv[indepvars_2022].describe().drop(['count','25%','75%'],axis=0).transpose().sort_index().to_excel(filepath+file_name)

